if (require("PageRank")) {
library(PageRank)
}else{
devtools::install_github("ryangreenup/PageRank")
library(PageRank)
}
Loading required package: PageRank
library(pacman)
pacman::p_load(PageRank, devtools, Matrix, igraph, mise, tidyverse, rgl, latex2exp)
# mise()
Define some constants
p <- seq(from = 0.01, to = 0.99, length.out = 10)
beta <- seq(from = 1, to = 20, length.out = 40)
sz <- seq(from = 100, to = 10, length.out = 5)
input_var <- expand.grid("p" = p, "beta" = beta, "size" = sz)
input_var
random_graph <- function(p, beta, size) {
g1 <- igraph::erdos.renyi.game(n = size, p)
A <- igraph::get.adjacency(g1) # Row to column
A <- Matrix::t(A)
A_dens <- mean(A)
T <- PageRank::power_walk_prob_trans(A, beta = beta)
tr <- sum(diag(T))
e2 <- eigen(T, only.values = TRUE)$values[2] # R orders by descending magnitude
return(c(abs(e2), mean(A), tr))
}
Map the function
nc <- length(random_graph(1, 1, 1))
Y <- matrix(ncol = nc, nrow = nrow(input_var))
for (i in 1:nrow(input_var)) {
X <- as.vector(input_var[i,])
Y[i,] <- random_graph(X$p, X$beta, X$size)
print(i/nrow(input_var))
}
[1] 5e-04
[1] 0.001
[1] 0.0015
[1] 0.002
[1] 0.0025
[1] 0.003
[1] 0.0035
[1] 0.004
[1] 0.0045
pairs(data)
cor(data)
p beta size eigenvalue2
p 1.000000e+00 -9.147063e-21 0.000000e+00 -0.5312247
beta -9.147063e-21 1.000000e+00 8.159342e-21 0.4406453
size 0.000000e+00 8.159342e-21 1.000000e+00 -0.4308528
eigenvalue2 -5.312247e-01 4.406453e-01 -4.308528e-01 1.0000000
A_dens 9.999629e-01 -1.040997e-04 2.837892e-03 -0.5324257
trace -5.318629e-01 -5.943376e-01 -2.002601e-03 -0.1202697
A_dens trace
p 0.9999628681 -0.531862883
beta -0.0001040997 -0.594337559
size 0.0028378923 -0.002002601
eigenvalue2 -0.5324256616 -0.120269676
A_dens 1.0000000000 -0.531765466
trace -0.5317654658 1.000000000
library(corrplot)
corrplot 0.84 loaded
cormat = cor(data, method = 'spearman')
corrplot(cormat, method = "ellipse", type = "lower")
names(data)
[1] "p" "beta" "size" "eigenvalue2"
[5] "A_dens" "trace"
Let’s look at a 3d Output
# plot3d(data$beta, data$A_dens, data$eigenvalue2, size = 2)
names(data)
[1] "p" "beta" "size" "eigenvalue2"
[5] "A_dens" "trace"
library(plotly)
d <- data[sample(1:nrow(data), 1000),]
d <- data
# d$beta <- log(d$beta)
fig <- plot_ly(d, x = ~A_dens, y = ~beta, z = ~eigenvalue2)
fig <- fig %>% add_markers(size = 1)
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Density'),
yaxis = list(title = 'Beta'),
zaxis = list(title = 'E2')))
fig
NA
names(data)
[1] "p" "beta" "size" "eigenvalue2"
[5] "A_dens" "trace"
library(plotly)
d <- data[sample(1:nrow(data), 1000),]
fig <- plot_ly(d, x = ~A_dens, y = ~trace, z = ~eigenvalue2)
fig <- fig %>% add_markers(size = 1)
fig <- fig %>% layout(scene = list(xaxis = list(title = 'Density'),
yaxis = list(title = 'trace'),
zaxis = list(title = 'E2')))
fig
NA
Clearly I should be able to model this.
Let’s look at density, it’s continuous so I should be able to take splices of A_dens.
A_dens is dependent on p, so I’ll just use different ps
p <- seq(from = 0.1, to = 0.95, length.out = 5)
beta <- seq(from = 1, to = 10, length.out = 1000)
size = 100
input_var <- expand.grid("p" = p, "beta" = beta, "size" = size)
input_var
nc <- length(random_graph(1, 1, 1))
Y <- matrix(ncol = nc, nrow = nrow(input_var))
for (i in 1:nrow(input_var)) {
X <- as.vector(input_var[i,])
Y[i,] <- random_graph(X$p, X$beta, X$size)
print(i/nrow(input_var))
}
if (sum(abs(Y) != abs(Re(Y))) == 0) {
Y <- Re(Y)
}
nrow(input_var)
nrow(Y)
Y <- as.data.frame(Y); colnames(Y) <- c("eigenvalue2", "trace")
(data2 <- cbind(input_var, Y)) %>% head()
data2$p <- factor(data2$p)
ggplot(data2[30:nrow(data2),], mapping = aes(col = p, x = beta, y = eigenvalue2)) +
geom_point(size = 0.5) +
stat_smooth() +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "Beta", y = TeX("Second Eigenvalue"), title = TeX("Second Eigenvalue given Matrix Density") ) +
guides(col = guide_legend("Link Density")) +
theme_bw()
These look logarithmic, let’s do a log transform:
ggplot(data2[30:nrow(data2),], mapping = aes(col = p, x = log(beta), y = eigenvalue2)) +
geom_point(size = 0.5) +
stat_smooth() +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "Beta", y = TeX("Second Eigenvalue"), title = TeX("Second Eigenvalue given Matrix Density") ) +
guides(col = guide_legend("Link Density")) +
theme_bw()
Link density is a function of size so E2ln(Beta) is still pretty useful.
it seems that this is very nearly linear for low densities, the web would have a low density, for example ba graphs appear bounded above by 1/2:
n_vec <- 0:20
m <- 3
y <- c()
for (n in n_vec) {
x <- sample_pa(n = n, power = 3, m = m) %>%
get.adjacency() %>%
mean()
y <- c(y, x)
}
plot(n_vec, y)
length(n_vec)
length(y)
So considering small Densities is wise, let’s have a look.
So at this stage I would like to fit a model over what we have in the first plotly diagram, but, I’m going to look at ba graphs in case there is something more insightful.
ggplot(data2[30:nrow(data2),], mapping = aes(col = factor(p), x = log(beta*p), y = eigenvalue2)) +
geom_point(size = 0.5) +
stat_smooth() +
scale_size_continuous(range = c(0.1,1)) +
labs(x = "Beta", y = TeX("Second Eigenvalue"), title = TeX("Second Eigenvalue given Matrix Density") ) +
guides(col = guide_legend("Link Density")) +
theme_bw()
It seems for very low densities that it is linear and then logarithmic for moderate densities. BA models have density~=m/n so for the BA model we’ll just use beta.